Visualising and Understanding CNNs

Matthew D Zeiler, Rob Fergus

https://arxiv.org/abs/1311.2901


In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import tensorflow as tf
import seaborn as sns
import matplotlib.pyplot as plt

In [16]:
from tqdm import tqdm
import time

In [2]:
from tensorflow.examples.tutorials.mnist import input_data

In [3]:
mnist = input_data.read_data_sets('../MNIST_data/',one_hot=True)
print mnist.train.images.shape
print mnist.test.images.shape


Extracting ../MNIST_data/train-images-idx3-ubyte.gz
Extracting ../MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../MNIST_data/t10k-labels-idx1-ubyte.gz
(55000, 784)
(10000, 784)

Implementing a CNN


In [4]:
tf.reset_default_graph()

In [5]:
with tf.variable_scope("placeholder") as scope:
    input_image = tf.placeholder(dtype=tf.float32,shape=[None,28,28,1],name="input")
    y = tf.placeholder(dtype=tf.float32,shape=[None,10],name="output")

In [6]:
with tf.variable_scope("conv1") as scope:
    w = tf.get_variable(name="weights",shape=[5,5,1,32],initializer=tf.contrib.layers.xavier_initializer_conv2d())
    b = tf.get_variable(name="biases",shape=[32],initializer=tf.random_normal_initializer())
    conv = tf.nn.conv2d(input_image,w,strides=[1,1,1,1],padding="SAME")
    relu = tf.nn.relu(conv+b,name=scope.name)
    conv1 = tf.nn.max_pool(relu,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME")

In [7]:
with tf.variable_scope("conv2") as scope:
    w = tf.get_variable(name="weights",shape=[5,5,32,64],initializer=tf.contrib.layers.xavier_initializer_conv2d())
    b = tf.get_variable(name="biases",shape=[64],initializer=tf.random_normal_initializer())
    conv = tf.nn.conv2d(conv1,w,strides=[1,1,1,1],padding="SAME")
    relu = tf.nn.relu(conv+b,name=scope.name)
    conv2 = tf.nn.max_pool(relu,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME")

In [8]:
with tf.variable_scope("fully_connected") as scope:
    w = tf.get_variable(name="weights",shape=[7*7*64,1024],initializer=tf.contrib.layers.xavier_initializer())
    b = tf.get_variable(name="biases",shape=[1024],initializer=tf.random_normal_initializer())
    out = tf.nn.relu(tf.matmul(tf.reshape(conv2,[-1,7*7*64]),w) + b)

In [9]:
with tf.variable_scope("dropout") as scope:
    keep_probs = tf.placeholder(dtype=tf.float32,name="probability")
    dropout = tf.nn.dropout(out,keep_prob=keep_probs)

In [10]:
with tf.variable_scope("softmax") as scope:
    w = tf.get_variable(name="weights",shape=[1024,10],initializer=tf.contrib.layers.xavier_initializer())
    b = tf.get_variable(name="biases",shape=[10],initializer=tf.random_normal_initializer())
    out = tf.matmul(dropout,w) + b
    logits = tf.nn.softmax(out)

In [11]:
with tf.variable_scope("loss") as scope:
    loss = tf.reduce_mean(-tf.reduce_sum(y*tf.log(logits),reduction_indices=[1]))

In [12]:
with tf.variable_scope("optimizer") as scope:
    optimizer = tf.train.AdagradOptimizer(0.01).minimize(loss)

In [13]:
with tf.variable_scope("accuracy") as scope:
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.arg_max(logits,1),tf.arg_max(y,1)),dtype=tf.float32))

In [14]:
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

In [15]:
n_training,n_features = mnist.train.images.shape
n_epochs = 5
batch_size = 100
n_batches = n_training/batch_size
n_testing = mnist.test.images.shape[0]

In [ ]:
for i in range(n_epochs):
    epoch_loss,epoch_acc = 0,0
    for _ in tqdm(range(n_batches)):
        x_batch,y_batch = mnist.train.next_batch(batch_size)
        x_batch = x_batch.reshape((batch_size,28,28,1))
        _,l,acc = sess.run([optimizer,loss,accuracy],feed_dict = {input_image:x_batch,y:y_batch,keep_probs:0.5})
        epoch_loss += l
        epoch_acc += acc
    print 'Epoch: {}\tLoss: {}\tAccuracy: {}'.format(i,epoch_loss,epoch_acc/batch_size)

In [ ]:


In [ ]:


In [ ]: